library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
# National Parks in California
ca = read_csv("https://raw.githubusercontent.com/ScienceParkStudyGroup/r-lesson-based-on-ohi-data-training/gh-pages/data/ca.csv")
Rows: 789 Columns: 7
── Column specification ────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (5): region, state, code, park_name, type
dbl (2): visitors, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(ca)
colnames(ca)
[1] "region" "state" "code" "park_name" "type" "visitors" "year"
str(ca)
spec_tbl_df [789 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ region : chr [1:789] "PW" "PW" "PW" "PW" ...
$ state : chr [1:789] "CA" "CA" "CA" "CA" ...
$ code : chr [1:789] "CHIS" "CHIS" "CHIS" "CHIS" ...
$ park_name: chr [1:789] "Channel Islands National Park" "Channel Islands National Park" "Channel Islands National Park" "Channel Islands National Park" ...
$ type : chr [1:789] "National Park" "National Park" "National Park" "National Park" ...
$ visitors : num [1:789] 1200 1500 1600 300 15700 ...
$ year : num [1:789] 1963 1964 1965 1966 1967 ...
- attr(*, "spec")=
.. cols(
.. region = col_character(),
.. state = col_character(),
.. code = col_character(),
.. park_name = col_character(),
.. type = col_character(),
.. visitors = col_double(),
.. year = col_double()
.. )
- attr(*, "problems")=<externalptr>
labs = c(
'CHIS'='Channel Islands National Park',
'DEVA'='Death Valley National Park',
'JOTR'='Joshua Tree National Park',
'KICA'='Kings Canyon National Park',
'LAVO'='Lassen Volcanic National Park',
'YOSE'='Yosemite National Park',
'SEQU'='Sequoia National Park',
'REDW'='Redwood National Park',
'PINN'='Pinnacles National Park'
)
bar_plot = ggplot(data=ca, aes(x = code, y = ..count.. / sum(..count..),fill = factor(code))) +
geom_bar(color='black') +
labs(x = "National Parks", y = "Percentage of National Parks in the data-set",
title = "Occurence of the National Parks in the data-set") +
scale_x_discrete(labels =labs)
scale_y_continuous(labels = scales::percent)
<ScaleContinuousPosition>
Range:
Limits: 0 -- 1
bar_plot + coord_flip()
density_plot = ggplot(data=ca, aes(x =log10(visitors))) +
geom_density(fill = "indianred3") +
labs(x = "brain weight", y="density", title="Kernal density of the brain weight")
density_plot
t <- list(family = "Helvetica",size = 14,color = "blue")
t1 <- list(family = "Times New Roman",color = "red")
t2 <- list(family = "Courier New",size = 14,color = "green")
t3 <- list(family = 'Arial')
scatter_plot = plot_ly(data=ca, x=~year, y=~visitors,color = ~park_name, type='scatter',mode='markers') %>%
layout(
title= list(text = "<b>Body weight vs Brain weight"),
legend = list(title = list(text ='<b>Animals')),
xaxis = list(title = list(text ='<b>Brain Weight')),
yaxis = list(title = list(text ='<b>Body Weight')),
plot_bgcolor='#e5ecf6')
scatter_plot
Warning in RColorBrewer::brewer.pal(N, "Set2") :
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2") :
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2") :
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2") :
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
r_group_barchart = data.table::melt(ca, id.vars='park_name') %>%
plot_ly(x = ~park_name, y = ~value, type = 'bar', name = ~variable, color = ~variable) %>%
layout(
title= list(text = "<b>Total Distribution based on Vore"),
legend = list(title = list(text= '<b>Aniamal Feature')),
xaxis = list(title = list(text ='<b>Vores')),
yaxis = list(title='Count', text='<b>Count'), barmode = 'group')
Warning in data.table::melt(ca, id.vars = "park_name") :
The melt generic in data.table has been passed a spec_tbl_df and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace like reshape2::melt(ca). In the next version, this warning will become an error.
r_group_barchart
df_order = data.frame(table(ca$park_name))
df_order
pie_chart = plot_ly(type='pie', labels=df_order$Var1, values=df_order$Freq,
textinfo='label+percent',insidetextorientation='radial') %>%
layout(
title= list(text = "<b>Order Distributions"),
legend = list(title = list(text= '<b>Order')))
pie_chart
histogram_plot = plot_ly(data = ca, x = ~(log(visitors)), name=~code,type="histogram") %>%
layout(
title= list(text = "<b>Total sleep time of Animals based on Vore"),
legend = list(title = list(text= '<b>Vore')),
xaxis = list(title = list(text ='<b>Visitors')),
yaxis = list(title = list(text ='<b>Count')))
histogram_plot
df_vore = data.frame(table(ca$code))
df_vore
donut_chart = plot_ly(labels=df_vore$Var1, values=df_vore$Freq,
textinfo='label+percent') %>%
add_pie(hole = 0.6) %>%
layout(
title= list(text = "<b>Order Distributions"),
legend = list(title = list(text= '<b>Order')))
donut_chart
library(gganimate)
library(gifski)
#scatter_plot_animate = ggplot(data=ca, aes(year, visitors)) + geom_point() +
# transition_states(park_name)
#animate(scatter_plot_animate, renderer = gifski_renderer())
## filtering out only 'CHIS', 'DEVA' AND 'JOTR' Park_code
d = ca %>%
filter(code%in%c('CHIS', 'DEVA', 'JOTR'))
d
#line_plot = ggplot(d, aes(x=year, y=visitors, group=code, color=code)) +
# geom_line() + geom_point() +
# transition_reveal(year)
#animate(line_plot, width=300, height=300, renderer = gifski_renderer())
Visitors started to visit Channel Islands National Park in 1960’s (latest among) has very low visitors till date. Joshua Tree National Park has a continuous growth of visitors without a big drop. Death Valley National Park has a continuous growth of visitors in 2010’s and a gradual pick-up.
library(dplyr)
state_map = map_data('state')
head(state_map)
data = state.x77
data = as.data.frame(data)
data$region = tolower(rownames(data))
head(data)
data_join = left_join(state_map, data, by='region')
head(data_join)
ggplot(data=data_join) +
geom_ploygon(aes(x=long, y=lat), group=group, color='white', fill=population)
Error in geom_ploygon(aes(x = long, y = lat), group = group, color = "white", :
could not find function "geom_ploygon"